##############################################################################
# File-Name: 09_analysis_iv_table16.r
# Purpose: Analysis primary outcomes
# Output: SM, Figures 5, 6, 7, 8, 9, 10, 11, 12, 15, 19
# Machine: macOS Ventura 13.5.1
# R version 4.3.1 (2023-06-16)
##############################################################################



# Packages ----------------------------------------------------------------
pacman::p_load(tidyverse, scales,  knitr, srvyr, extrafont, 
               rebus, broom, tidyr, lubridate, here, ggdist, ggtext, ggalt, janitor, 
               broom, tidyr,  stargazer, janitor, estimatr, list)


source(here("code","_utils.r"))

# Data wrangling ----------------------------------------------------------

# open
d <- readRDS(here("data","all_processed_data.rds"))


# convert the data to the long format ------------------------------------------------------------

# Get outcomes for beleifs in misinfo in the long format
belief <- d %>% 
    select(response_id, "false_1_true", "false_2_true", "false_3_true", "false_4_true") %>%
    pivot_longer(cols=-c(response_id, contains("exp")), 
                 names_to = "item", 
                 values_to = "true") %>%
    mutate(item=str_remove(item, "_true")) 

# Get outcomes for recall/exposure in the long format
exp <- d %>% 
    select(response_id, "false_1_exp", "false_2_exp", "false_3_exp", "false_4_exp") %>%
    pivot_longer(cols=-c(response_id), 
                 names_to = "item", 
                 values_to = "exposure")  %>%
    mutate(item=str_remove(item, "_exp")) 

# join the datasets
d_ <- left_join(belief, exp)

# Merge everything with the main dataset
d_ <- left_join(d_, d)


# Model -------------------------------------------------------------------
## Belief false news ~ exposure by item
## Belief false news ~ exposure + controls by item
## IV: Belief false news ~ exposure + controls | experiment

# function
build_reg_expression <- function(depvar, indvar, cace=FALSE){
    # covariates pre treatment
    add.vars = c(           
        #demographics
        "w1_q_gender" ,  
        "q_race" ,  
        "w1_q_education" ,
        "w1_q_age_num",
        "income_num",
        
        
        # trust  
        "as.numeric(w1_q_trust_government)" ,
        "as.numeric(w1_q_trust_congress)" ,
        "as.numeric(w1_q_trust_supreme_court)" ,
        "as.numeric(w1_q_trust_electoral_authorities)" ,
        "as.numeric(w1_q_trust_globo)" ,
        "as.numeric(w1_q_trust_news_channels)" ,
        "w1_q_legitimacy" ,
        
        # polarization    
        "w1_affective_pol_bolsonaro" ,
        " w1_affective_pol_lula" ,
        
        # ideology  
        "w1_ideology_you" ,  
        "w1_q_politics_num" ,   
        
        # social media usage
        "w1_q_whatsapp_num ",
        "as.numeric(w1_q_fake_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_pay_bills)" ,
        "as.numeric(w1_q_whatsapp_purposes_family)" ,
        "as.numeric(w1_q_whatsapp_purposes_groups)"
    )
    
    model.expr = as.formula(paste(depvar, "~", indvar, "+",
                                  paste(add.vars, collapse = "+")))
    
    
    if(cace==TRUE){
        
        model.expr=as.formula(paste(depvar, "~", indvar, "+",
                                    paste(add.vars, collapse = "+"), "| exp ", 
                                    "+",
                                    paste(add.vars, collapse = "+")))
        return(model.expr)
        
    }
    return(model.expr)
    
    
}

## Direct effects
lm(build_reg_expression("false_1_true", "false_1_exp"), data = d) %>% summary()
lm(build_reg_expression("false_2_true", "false_2_exp"), data = d) %>% summary()
lm(build_reg_expression("false_3_true", "false_3_exp"), data = d) %>% summary()
lm(build_reg_expression("false_4_true", "false_4_exp"), data = d) %>% summary()

# first stage
lm(build_reg_expression("false_1_true", "exp"),  data = d) %>% summary()
lm(build_reg_expression("false_2_true", "exp"), data = d) %>% summary()
lm(build_reg_expression("false_3_true", "exp"), data = d) %>% summary()
lm(build_reg_expression("false_4_true", "exp"), data = d) %>% summary()

# full iv
iv_robust(build_reg_expression("false_1_true", "false_1_exp", cace=TRUE), data = d, se_type="HC0") %>% summary()
iv_robust(build_reg_expression("false_2_true", "false_2_exp", cace=TRUE), data = d,  se_type="HC0") %>% summary()
iv_robust(build_reg_expression("false_3_true", "false_3_exp", cace=TRUE), data = d,  se_type="HC0") %>% summary()
iv_robust(build_reg_expression("false_4_true", "false_4_exp", cace=TRUE), data = d,  se_type="HC0") %>% summary()

# Item combines

## Direct effects
itt <- lm_robust(true ~ exposure, data = d_, clusters = response_id) 

## first stage
itt_cov <- lm_robust(build_reg_expression("true", "exposure"), data = d_, clusters = response_id) 

## full iv
cace  <- iv_robust(build_reg_expression("true", "exposure", cace=TRUE), data = d_,clusters = response_id)

# Table
new_names = c ("(Intercept)"  = "Intercept",  
               "exposure" = "Exposure to False Rumors")

rows <- tribble(~term,          ~m1,  ~m2, ~m3,
                'Controls', 'no', 'yes',   'yes')

attr(rows, 'position') <- c(4)

# output
#output_ov <- "~/Dropbox/Apps/Overleaf/whatsapp_deactivation/output"

# table
library(kableExtra)
library(modelsummary)

# latex version
# modelsummary(list("ITT Belief Misinfo ~ Exposure"=itt,
#                   "Cov-ITT Belief Misinfo ~ Exposure"= itt_cov, 
#                   "CACE  Belief Misinfo ~ Exposure | Deactivation" = cace),
#              output="latex",
#              fmt = 3,
#              stars = TRUE,  
#              coef_map = new_names, 
#              title = "Regression Models: Models regressing False Rumors Accuracy on False Rumors Exposure for all items. The first column presents simple OLS model with no controls. The second column presents Covariates-Adjusted Model. The third columns presents an instrumental variable estimation with exposure as the endogenous variable. The results indicate exposure strongly predicts belief for misinformation. However, when removing endogenous components using the instrumental variable model, the effects disappear.",
#              gof_omit = 'AIC|BIC', 
#              add_rows = rows) %>%
#     footnote(general = "Robust standard errors in Parentheses", 
#              threeparttable = TRUE)   %>%
#     save_kable(file =  "output/sm_tab16.tex")

# if a latex issue is raised, you can see the table on html with the code below
modelsummary(list("ITT Belief Misinfo ~ Exposure"=itt,
                  "Cov-ITT Belief Misinfo ~ Exposure"= itt_cov, 
                  "CACE  Belief Misinfo ~ Exposure | Deactivation" = cace),
             fmt = 3,
             stars = TRUE,  
             coef_map = new_names, 
             title = "Regression Models: Models regressing False Rumors Accuracy on False Rumors Exposure for all items. The first column presents simple OLS model with no controls. The second column presents Covariates-Adjusted Model. The third columns presents an instrumental variable estimation with exposure as the endogenous variable. The results indicate exposure strongly predicts belief for misinformation. However, when removing endogenous components using the instrumental variable model, the effects disappear.",
             gof_omit = 'AIC|BIC', 
             add_rows = rows) 
